##############################################################################
# File-Name: 00_process_data.r
# Purpose: Process the raw anonymized survey data
# Output: all_data.rds
# Machine: macOS Ventura 13.5.1
# R version 4.3.1 
##############################################################################

# Basic packaging ------------------------------------------------------------------
## for packing in R
## If you do not have pacman installed, please do. 
#install.packages("pacman")
pacman::p_load(tidyverse, scales, lubridate, here, janitor, 
               rebus, RColorBrewer, wesanderson, psych)


# Functions for cleaning ---------------------------------------------------------------

# short view of the data
short_view <- function(data){
    data %>% slice(1:50) %>% View()
}

# convert seconds to minutes
min_conv <- function(x){
    x/60
}

# function to calculate pca in a tidyverse style
tidy_pca <- function(data, var_input, scores_name, alpha_name){
    input = data[var_input]  
    #PCA_results <- principal(input, nfactors = 1)
    #as.numeric(PCA_results$scores)
    alpha = psych::alpha(input, 'check.keys=TRUE')$total$std.alpha 
    prcomp<- prcomp(input)
    scores <- prcomp$x[,1]
    bind_cols(data,as_tibble_col(scores, column_name = scores_name), as_tibble_col(alpha, column_name = alpha_name))
}

# function to calculate z score in a tidyverse style
center_scale <- function(x) {
    out = scale(x, scale = TRUE)
    as.numeric(out[,1])
}

# function to build index from zscores in a tidyverse style
zscores_index <- function(data, vars, output){
    score = data %>%
        rowwise() %>%
        mutate(score=sum(c_across(vars))) %>%
        ungroup() %>%
        pull(score)
    
    bind_cols(data, as_tibble_col(score, column_name = output))
}

# Open the data -----------------------------------------------------------
d <- read_rds(here("data", "all_raw_data.rds"))

# Recode vars -------------------------------------------------------------
## Several of the steps below are to recode the textual responses from portuguese to english.


# recode race, built list of options
names_race <- names(table(d$q_race))
race_list <- list("White"=names_race[[1]], 
                  "White"=names_race[[2]], 
                  "Non-White"=names_race[[3]], 
                  "Non-White"=names_race[[5]], 
                  "Non-White"=names_race[[6]],
                  "Non-White"=names_race[[8]], 
                  "Prefer not to answer"=names_race[[4]], 
                  "Prefer not to answer"= names_race[[7]])

# recode income
names_income <- names(table(d$income))
income_recode <- c(names_income[9], names_income[6], names_income[1:5], names_income[7:8])

# recode 
d <- d %>%
    mutate(q_race=fct_relevel(fct_recode(q_race, !!!race_list), rev(unique(names(race_list)))),
           income_num=as.numeric(fct_relevel(income, income_recode)))


# Subjective well-being ---------------------------------------------------

# list of textual options and order accordingly
names_sb <- names(table(d$q_sub_wb_1))
sub_wellbeing_list <- list("always"=names_sb[[5]], 
                           "frequently"=names_sb[[2]], 
                           "sometimes"=names_sb[[1]], 
                           "rarely"=names_sb[[4]], 
                           "never"=names_sb[[3]])

# recode all subjective well-being
d <- d %>%
    # recode from pt to eng
    mutate_at(vars(contains("q_sub_wb")),
              ~ fct_relevel(fct_recode(.x, !!!sub_wellbeing_list), 
                            names(sub_wellbeing_list))) %>%
    mutate_at(vars(contains("q_sub_wb")),
              fct_rev) %>%
    # rename columns: use this in the regression by item
    # interpretation is much easier
    rename("q_well_being_happy"="q_sub_wb_1", # rename
           "q_well_being_depressed"="q_sub_wb_2", 
           "q_well_being_anxious"="q_sub_wb_3", 
           "q_well_being_isolated"="q_sub_wb_4", 
           "q_well_being_satisfied"="q_sub_wb_5") 

# Build index
# revert for the same direction across all items
# invert depressed, anxious, isolared
# higher values means higher subjective well-being
d <- d %>% 
    mutate(across(c(q_well_being_anxious, q_well_being_depressed, q_well_being_isolated),
                  fct_rev, 
                  .names="{col}_rev")) %>%
    # add the same suffix to facilitate selection_later
    mutate(q_well_being_happy_rev=q_well_being_happy, 
           q_well_being_satisfied_rev=q_well_being_satisfied) %>%
    mutate(across(contains("_rev"), 
                  as.numeric, 
                  .names="{col}_numeric")) %>% 
    rename_at(vars(contains("_rev")), ~ str_replace_all(.x, "_rev_", "_"))



# check: high numbers should align with high subjective well-being
table(d$q_well_being_depressed_numeric, d$q_well_being_depressed) # 1 means always depressed -> low SWB; 5 mean never depressed -> high subjective well-being
table(d$q_well_being_happy_numeric, d$q_well_being_happy) # 5 means always happy -> high subjective well-being


# select subjective well-being vars
subj_vars <- c("q_well_being_depressed_numeric",
               "q_well_being_happy_numeric", 
               "q_well_being_anxious_numeric",
               "q_well_being_satisfied_numeric", 
               "q_well_being_isolated_numeric")

# calculate subjective well-being index using zscores
d <- d %>%
    # create zscore for each column
    mutate(across(subj_vars, center_scale)) %>%
    # sum up the zcores
    zscores_index(subj_vars, "swb_zcore") %>%
    # pca
    tidy_pca(subj_vars, scores_name = "swb_pcaindex", alpha_name="swb_alpha")

# double check
d %>% select(subj_vars, contains("swb"))


# Digital literacy --------------------------------------------------------

# get the names of digital literacy responses
dl_names <- names(table(d$q_digital_lit_1))


# clean digital literacy items
d <- d %>%
    # make it a factor
    mutate(across(contains("q_digital_lit"), 
                  ~ str_remove_all(.x, "- nao concordo nada| - concordo muito")))%>%  
    mutate(across(contains("q_digital_lit"),
                  as.factor)) %>%
    # reverse 1 and 4 to make higher values = higher digital literacy 
    mutate(q_digital_lit_1=fct_rev(q_digital_lit_1), 
           q_digital_lit_4=fct_rev(q_digital_lit_4))  %>%
    # make numeric 
    mutate(across(contains("q_digital_lit"),
                  as.numeric)) 

# make sure inversion is correct
d %>% select(contains("digital_lit"))

# build z-score index
dl_vars <- c("q_digital_lit_1",
             "q_digital_lit_2",
             "q_digital_lit_3",
             "q_digital_lit_4") 

# build z-score
d <- d %>%
    # create zscore for each column
    mutate(across(dl_vars, center_scale)) %>%
    # sum up the zcores
    zscores_index(dl_vars, "dl_zcore") %>%
    # pca
    tidy_pca(dl_vars, scores_name = "dl_pcaindex", alpha_name="dl_alpha") 

# Headline tasks ----------------------------------------------------------

# Fixing the name of the fourth task, which is false. 
d <- d %>%
    rename("f_headline_4_acc"="t_headline_4_acc", 
           "f_headline_4_exp"="t_headline_4_exp")


# Recode Numerically:
# logic for naming the variables: {task_type}_{number_of_the_task}_{respondents_accuracy_answer}
# if var false_1_false=1, it means, respondents answered false to the first false item in the headline task. 

d <- d %>%
    mutate(false_1_false=ifelse(f_headline_1_acc=="falsa",1, 0), 
           false_2_false=ifelse(f_headline_2_acc=="falsa",1, 0), 
           false_3_false=ifelse(f_headline_3_acc=="falsa",1, 0), 
           false_4_false=ifelse(f_headline_4_acc=="falsa",1, 0), 
           false_1_true=ifelse(f_headline_1_acc=="verdadeira",1, 0), 
           false_2_true=ifelse(f_headline_2_acc=="verdadeira",1, 0), 
           false_3_true=ifelse(f_headline_3_acc=="verdadeira",1, 0), 
           false_4_true=ifelse(f_headline_4_acc=="verdadeira",1, 0), 
           true_1_true=ifelse(t_headline_5_acc=="verdadeira",1, 0), 
           true_2_true=ifelse(t_headline_6_acc=="verdadeira",1, 0), 
           true_3_true=ifelse(t_headline_7_acc=="verdadeira",1, 0), 
           true_4_true=ifelse(t_headline_8_acc=="verdadeira",1, 0), 
           true_1_false=ifelse(t_headline_5_acc=="falsa",1, 0), 
           true_2_false=ifelse(t_headline_6_acc=="falsa",1, 0), 
           true_3_false=ifelse(t_headline_7_acc=="falsa",1, 0), 
           true_4_false=ifelse(t_headline_8_acc=="falsa",1, 0)) %>%
    rowwise() %>%
    # sum to build composite items
    mutate(false_false_sum=sum(false_1_false, false_2_false, false_3_false, false_4_false), 
           false_true_sum=sum(false_1_true, false_2_true, false_3_true, false_4_true)) %>%
    mutate(true_true_sum=sum(true_1_true, true_2_true, true_3_true, true_4_true), 
           true_false_sum=sum(true_1_false, true_2_false, true_3_false, true_4_false), 
           disc=false_false_sum+true_true_sum)%>%
    ungroup() 

# make a couple of sanity checks
table(d$false_1_false, d$f_headline_1_acc)
table(d$false_2_false, d$f_headline_2_acc)
table(d$false_3_false, d$f_headline_3_acc)
table(d$false_4_false, d$f_headline_4_acc)
table(d$false_false_sum, d$f_headline_1_acc)
table(d$true_true_sum, d$t_headline_5_acc)

# Recode exposure. same logic as in accuracy judgments.
table(d$f_headline_1_exp)
d <- d %>%
    mutate(false_1_exp=ifelse(f_headline_1_exp=="sim",1, 0), 
           false_2_exp=ifelse(f_headline_2_exp=="sim",1, 0), 
           false_3_exp=ifelse(f_headline_3_exp=="sim",1, 0), 
           false_4_exp=ifelse(f_headline_4_exp=="sim",1, 0), 
           true_1_exp=ifelse(t_headline_5_exp=="sim",1, 0), 
           true_2_exp=ifelse(t_headline_6_exp=="sim",1, 0), 
           true_3_exp=ifelse(t_headline_7_exp=="sim",1, 0), 
           true_4_exp=ifelse(t_headline_8_exp=="sim",1, 0)) %>%
    rowwise() %>%
    mutate(false_news_exp=sum(false_1_exp, false_2_exp, false_3_exp, false_4_exp), 
           true_news_e=sum(true_1_exp, true_2_exp, true_3_exp, true_4_exp)) %>%
    ungroup()


# Confidence in headline responses ---------------------------------------------------
names_trust <- names(table(d$q_confidence))
# translate from PT to English
trust_list <- list("not confident at all"=names_trust[[4]], 
                   "not very confident"=names_trust[[3]], 
                   "somewhat confident"=names_trust[[5]], 
                   "confident"=names_trust[[1]], 
                   "very confident"=names_trust[[2]])

# recode all confidence results
d <- d %>%
    # recode from pt to eng and reorder
    mutate(q_confidence = fct_relevel(fct_recode(q_confidence, !!!trust_list), 
                                      names(trust_list)))


# Political identity ------------------------------------------------------------------

# recode vote choice in the runoff presidential election
d <- d %>%
    mutate(q_runoff=case_when(q_runoff=="lula" ~ "Left", 
                              q_runoff=="jair bolsonaro" ~ "Right", 
                              TRUE ~ "Null/Undecided/Absent"), 
           vote_left=ifelse(q_runoff=="Left",  1, 0), 
           vote_right=ifelse(q_runoff=="Right",  1, 0), 
           partisans=ifelse(q_runoff== "Null/Undecided/Absent", 0, 1)) 

table(d$q_runoff)

# recode positive partisanship
d <- d %>%
    mutate(partisans_pt=ifelse(positive_part=="partido dos trabalhadores (pt)", 1, 0), 
           anti_partisans_pt=ifelse(negative_part=="partido dos trabalhadores (pt)", 1, 0))



# Trust variables  ------------------------------------------------------------

# get labels
trust_name <- names(table(d$q_trust_out_1))

# recode all subjective trust measures
d <- d %>%
    # clean and recode
    mutate_at(vars(contains("q_trust_")),
              ~ str_remove_all(.x, "- nao confia nada| - confia muito"))  %>%
    mutate_at(vars(contains("q_trust_")), as.numeric) %>% 
    #rename collumns
    rename("q_trust_government"="q_trust_out_1", # rename
           "q_trust_congress"="q_trust_out_2", 
           "q_trust_electoral_authorities"="q_trust_out_3", 
           "q_trust_globo"="q_trust_out_4", 
           "q_trust_news_channels"="q_trust_out_5")  

# check
trust_vars  <- c("q_trust_government",
                 "q_trust_congress", 
                 "q_trust_electoral_authorities", 
                 "q_trust_globo", 
                 "q_trust_news_channels")

# calculate trust z score index
d <- d %>%
    # create zscore for each column
    mutate(across(trust_vars, center_scale)) %>%
    # sum up the zcores
    zscores_index(trust_vars, "trust_zcore") %>% 
    # pca
    tidy_pca(trust_vars, scores_name = "trust_pcaindex", alpha_name="trust_alpha")   


# check values
d %>%
    select(contains("trust_"))

# ideology variables ----------------------------------------------------------------

# rename, convert to numeric, and calculate distance between person and the candidate
d <- d %>%
    rename("ideology_self"="q_ideology_out_1", 
           "ideology_lula"="q_ideology_out_7", 
           "ideology_bolsonaro"="q_ideology_out_8") %>%
    mutate(across(contains("ideology_"), 
                  as.numeric)) %>%
    mutate(distance_candidates=(ideology_lula-ideology_bolsonaro)^2, 
           self_to_left=(ideology_self-ideology_lula)^2, 
           self_to_right=(ideology_self-ideology_bolsonaro)^2)


# False Polarization as in Ender  and Armaly, 2019
d <- d %>%
    # perceived polarization = absolute(self position - position of the outgroup)
    mutate(perceived_polarization=case_when(q_runoff=="Left" ~ abs(ideology_self-ideology_bolsonaro), 
                                            q_runoff=="Right" ~ abs(ideology_self-ideology_lula))) %>%
    # mean of ideological position by group of voters
    group_by(q_runoff) %>%
    mutate(mean_position=mean(ideology_self, na.rm=TRUE)) %>%
    ungroup() %>%
    mutate(actual_polarization=abs(ideology_self-mean_position), # self ideology - mean position of the outgroup
           false_polarization=abs(actual_polarization-perceived_polarization))

# check
mean(d$perceived_polarization, na.rm = TRUE)
mean(d$actual_polarization, na.rm = TRUE)

# Legitimacy of the Eletronic Voting --------------------------------------

# recode the variables
d <- d %>%
    # recode from pt to eng and reorder
    mutate(q_legitimacy= fct_relevel(fct_recode(q_legitimacy, !!!trust_list), 
                                     names(trust_list)))

# Affective Polarization --------------------------------------------------

d <- d %>%
    # remove text and make it numeric
    mutate_at(vars(contains("affective_")),
              ~ str_remove_all(.x, "- nao gosto nada|- gosto muito"))  %>%
    mutate_at(vars(contains("affective_")), as.numeric) %>% 
    #rename collumns
    rename("affective_pol_lula"="affective_pol_1", # rename
           "affective_pol_bolsonaro"="affective_pol_2", 
           "affective_pol_lgbt"="affective_pol_3", 
           "affective_pol_evangelicals"="affective_pol_4")  %>%
    # affective for all respondents (Lula - Bolsonaro)
    mutate(affective_pol_diff_all=abs(affective_pol_lula-affective_pol_bolsonaro), 
           affective_social_diff_all=abs(affective_pol_lgbt-affective_pol_evangelicals), 
    # affective only for partisans= ingroup - outgroup
           affect_pol_diff_partisans=case_when(q_runoff=="Left" ~ affective_pol_lula-affective_pol_bolsonaro,
                                               q_runoff=="Right" ~ affective_pol_bolsonaro-affective_pol_lula), 
           affect_pol_social_partisans=case_when(q_runoff=="Left" ~ affective_pol_lgbt-affective_pol_evangelicals,
                                                 q_runoff=="Right" ~ affective_pol_evangelicals-affective_pol_lgbt))


# Social polarization -----------------------------------------------------

# summing the number of actions participants are not willing to do with outgorups
d <- d %>%
    rename("q_social_pol_friends"="q_social_pol_1", 
           "q_social_pol_dating"="q_social_pol_2", 
           "q_social_pol_soccer"="q_social_pol_3", 
           "q_social_pol_children"="q_social_pol_4", 
           "q_social_pol_christmas"="q_social_pol_5", 
           "q_social_pol_whatsapp"="q_social_pol_6") %>%
    mutate_at(vars(contains("q_social_pol")), ~ ifelse(is.na(.x),0, 1)) %>%
    rowwise() %>%
    mutate(social_pol_agg=sum(c_across(q_social_pol_friends:q_social_pol_whatsapp)))  %>%
    ungroup()




# Policy Preferences -------------------------------------------------------

# Clean policy preference
d <- d %>%
    # clean and recode
    mutate_at(vars(contains("policy_sup")),
              ~ str_remove_all(.x, "- discordo muito| - concordo muito"))  %>%
    mutate_at(vars(contains("policy_sup")), as.factor) %>%
    # reverse 2 and 4 to make higher values = more to the left
    mutate(policy_sup_2=fct_rev(policy_sup_2), 
           policy_sup_4=fct_rev(policy_sup_4)) %>%
    mutate_at(vars(contains("policy_sup")), as.numeric) 

# double check this above

# logic here:
## take the difference between policy stance and ingroup policy stance. 
## polarization means respondent i is farther apart toward the extreme of the distribution compared
## to the ingroup average position. Therefore, we need to multiply the right by -1. 

d <- d %>% 
    group_by(q_runoff) %>%
    mutate(across(contains("policy_sup"),
                  mean, na.rm=TRUE, 
                  .names="{col}_mean")) %>%
    ungroup() %>%
    # take difference between individual and in party position 
    mutate(policy_polarization_1=policy_sup_1-policy_sup_1_mean, 
           policy_polarization_2=policy_sup_2-policy_sup_2_mean, 
           policy_polarization_3=policy_sup_3-policy_sup_3_mean, 
           policy_polarization_4=policy_sup_4-policy_sup_4_mean, 
           policy_polarization_5=policy_sup_5-policy_sup_5_mean) %>%
    # multiply for -1 for right, means more to the right and more to the left are the same movement
    mutate(across(contains("policy_polarization_"), ~ case_when(q_runoff=="Right" ~ -1*.x, 
                                                                q_runoff=="Left" ~ .x), 
                  .names="{col}_directional")) %>%
    # savuing the absolute values as well.
    mutate(abs_policy_polarization_1=abs(policy_polarization_1), 
           abs_policy_polarization_2=abs(policy_polarization_1), 
           abs_policy_polarization_3=abs(policy_polarization_1), 
           abs_policy_polarization_4=abs(policy_polarization_1), 
           abs_policy_polarization_5=abs(policy_polarization_1)) %>%
    rowwise() %>%
    mutate(mean_abs_policy_polarization=mean(c_across(abs_policy_polarization_1:abs_policy_polarization_5)),
           mean_policy_polarization_direction=mean(c_across(policy_polarization_1_directional:policy_polarization_5_directional))) %>%
    ungroup()


# Polarization z-score index ------------------------------------------------------

# select vars for measures using the entire samples
pol_vars <- c("distance_candidates", 
              "affective_pol_diff_all", 
              "social_pol_agg", 
              "mean_abs_policy_polarization") 

# first using measures for whole data
# variables: ideology_distance, social_pol, affective_all, mean(abs_policy_polarization_1-5)
# calculate index pca

d <- d %>%
    # create zscore for each column
    mutate(across(pol_vars, center_scale)) %>%
    # sum up the zcores
    zscores_index(pol_vars, "pol_all_zcore") %>%
    # pca
    tidy_pca(pol_vars, scores_name = "pol_all_pcaindex", alpha_name="pol_all_alpha") 


# Outgroup polarization index. Here only using participants who have a presidential candidate
# variables: false_polarization, social_pol, affective_outgroup, mean(_policy_polarization_1-5)
# calculate index pca

pol_vars_p = c("false_polarization", 
               "affect_pol_diff_partisans", 
               "social_pol_agg", "mean_policy_polarization_direction")

# create the z-score
d <- d %>%
    # create zscore for each column
    mutate(across(pol_vars_p, center_scale)) %>%
    # sum up the zcores
    zscores_index(pol_vars_p, "pol_partisans_zscore")

# #calculate pca with missing values. I am not using this in the paper. Just checking the difference between zscore and pca. 
# 
# input = d[pol_vars_p]  
# #PCA_results <- principal(input, nfactors = 1)
# #as.numeric(PCA_results$scores)
# alpha = psych::alpha(input, 'check.keys=TRUE')$total$std.alpha 
# prcomp <- prcomp(na.omit(input))
# scores <- prcomp$x[,1]
# temp <- bind_cols(na.omit(d %>% 
#                               select(q_email, pol_vars_p)),as_tibble_col(scores, column_name = "pol_partisans_pca"), as_tibble_col(alpha, column_name = "alpha_pol_partisans")) %>%
#     select(q_email, pol_partisans_pca, alpha_pol_partisans)
# 
# # merge
# d <- left_join(d, temp)
# summary(lm(pol_partisans_pca ~ exp, data=d))
# summary(lm(pol_partisans_zscore ~ exp, data=d))

# Substitution effects --------------------------------------------------------------

# get scale
names_news <- names(table(d$q_news_source_1))

news_list <- list("Much less"=names_news[[4]], 
                  "Less"=names_news[[2]], 
                  "Same"=names_news[[5]], 
                  "More"=names_news[[1]], 
                  "Much more"=names_news[[3]])

# recode all subjective well-being
d <- d %>%
    # recode from pt to eng and reorder
    mutate_at(vars(contains("q_news_source")), ~ fct_relevel(fct_recode(.x, !!!news_list), 
                                                             names(news_list))) %>%
    rename("q_subs_other_social_media"="q_news_source_1", 
           "q_subs_reading_news_internet"="q_news_source_2", 
           "q_subs_watching_tv"="q_news_source_3", 
           "q_subs_offline_friends"="q_news_source_4", 
           "q_subs_away_phone"="q_news_source_5")



# Substitution Social Media apps ------------------------------------------
d <- d %>%
    rename("used_more_whatsapp"="q_social_media_usage_1", 
           "used_more_facebook"="q_social_media_usage_2", 
           "used_more_twitter"="q_social_media_usage_3", 
           "used_more_tiktok"="q_social_media_usage_4", 
           "used_more_telegram"="q_social_media_usage_5", 
           "used_more_youtube"="q_social_media_usage_7", 
           "used_more_instagram"="q_social_media_usage_8",
           "used_more_none"="q_social_media_usage_9") %>%
    mutate_at(vars(contains("used_more")), ~ ifelse(is.na(.x),0, 1)) 


# Whatsapp usage ----------------------------------------------------------------

# how often do you use WhatsApp? Participants selections criteria
levels_whatsapp <- names(table(d$q_whatsapp))

list_whatsapp = list(
    "Not every day" = levels_whatsapp[[5]],
    "Less than 10 min" = levels_whatsapp[[7]],
    "10 ~ 30 minutes" = levels_whatsapp[[2]],
    "30 ~ 1 hour" = levels_whatsapp[[4]],
    "1 ~ 2 hours" = levels_whatsapp[[1]],
    "2 ~ 4 hours"=levels_whatsapp[[3]],
    "+4 hours" =levels_whatsapp[[6]])

d <- d %>%
    mutate(q_whatsapp=fct_recode(q_whatsapp, !!!list_whatsapp), # recoding
           q_whatsapp=fct_relevel(q_whatsapp, names(list_whatsapp)))# reordering 



# Receive fake news? ------------------------------------------------------

levels_wp_fake <- names(table(d$q_fakenews))

list_wp_fake=list("At least 10 times a day"= levels_wp_fake[[6]],
                  "Several times a day"= levels_wp_fake[[7]],
                  "About once a day"= levels_wp_fake[[3]],
                  "3 to 6 days a week"=  levels_wp_fake[[4]],
                  "1 to 2 days a week"= levels_wp_fake[[1]],
                  "Every few weeks"= levels_wp_fake[[2]],
                  "Don’t Know"= levels_wp_fake[[5]])

d <- d %>%
    mutate(q_fakenews=fct_recode(q_fakenews, !!!list_wp_fake), # recoding
           q_fakenews=fct_relevel(q_fakenews, names(list_wp_fake)), 
           q_fake_news=ifelse(q_fakenews=="Don't Know", NA, q_fakenews))# reordering 



# variable for compliance self-reported ----------------------------------------------------
comp <- names(table(d$q_com_direct_control))
comp=list("At least 10 times a day"= comp[[7]],
          "Several times a day"= comp[[8]],
          "About once a day"= comp[[3]],
          "3 to 6 days a week"=  comp[[4]],
          "1 to 2 days a week"= comp[[1]],
          "Every few weeks"= comp[[2]],
          "Never"= comp[[6]],
          "Don’t Know"= comp[[5]])


# recode
d <- d %>%
    mutate_at(vars(c(q_com_direct_control,q_compliance_direct)), ~ fct_relevel(fct_recode(.x, !!!comp), 
                                                                               names(comp))) 

# we had two difference variables, one for control, and one for treatment, Bellow I combine in single var

d <- d %>%
    mutate_at(vars(c(q_com_direct_control,q_compliance_direct)), ~ifelse(is.na(.x), "", as.character(.x))) %>%
    mutate(compliance_self_reported=str_trim(paste0(q_compliance_direct, q_com_direct_control)), 
           compliance_self_reported=ifelse(compliance_self_reported=="Don't Know"|compliance_self_reported=="", NA, as.character(compliance_self_reported)), 
           compliance_self_reported=fct_relevel(compliance_self_reported, names(comp)))


# Measure for which Topics the images were referring to  -------------------------------------------------------

d <- d %>%
    # this multi-field in qualtrics. So I am first converting nas to empty strings
    mutate_at(vars(c(q_compliance_1:q_compliance_7)), ~ifelse(is.na(.x), "", as.character(.x))) %>%
    mutate_at(vars(c(q_compliance_control_1:q_compliance_control_7)), ~ifelse(is.na(.x), "", as.character(.x))) %>% 
    # recoding for each topic
    mutate(media_sports=str_trim(paste0(q_compliance_1, q_compliance_control_1)),
           media_politics=str_trim(paste0(q_compliance_4, q_compliance_control_4)), 
           media_family_friends=str_trim(paste0(q_compliance_5, q_compliance_control_5)),
           media_memes=str_trim(paste0(q_compliance_6, q_compliance_control_6)),
           media_work=str_trim(paste0(q_compliance_7, q_compliance_control_7))) 




# Spillover variable ---------------------------------------------------------------

d <- d %>%
    mutate(q_spillover=ifelse(q_spillover=="sim", 1, 0))

# replace na values for covariates to be added to the models with median values. 
replace_na_to_median <- function(data, x){
    data %>% 
        mutate_at(vars({{x}}), ~ifelse(is.na(.), median(., na.rm = TRUE), .))
}

# apply
d <- d %>%
    replace_na_to_median(w1_q_gender) %>%
    replace_na_to_median(q_race) %>%
    replace_na_to_median(income_num) %>%
    replace_na_to_median(w1_q_age_num)  %>%
    replace_na_to_median(w1_trust_zcore) %>%
    replace_na_to_median(w1_affective_pol_diff_all) %>%
    replace_na_to_median(w1_ideology_you) %>%
    replace_na_to_median(w1_q_politics_num) %>%
    replace_na_to_median(w1_q_whatsapp_num) 


# save --------------------------------------------------------------------

# Rdata ~ preserves ordering in the cleaning
saveRDS(d, file = here("data", "all_processed_data.rds"))

